knitr::opts_chunk$set(echo = TRUE, root.dir = here::here())

library(dplyr)
library(ggplot2)
library(googledrive)
library(data.table)
set.seed(2019)

Meta-analysis of genetic correlation studies between Parkinson’s Disease and other phenotypes.

Prepare data

library(phenomix)
## API: public: http://gwas-api.mrcieu.ac.uk/
## Registered S3 method overwritten by 'spatstat.geom':
##   method     from
##   print.boxx cli
data("DEGAS_seurat")

pd_traits <- DEGAS_seurat@meta.data[grepl("parkinson",DEGAS_seurat@meta.data$label_phe, ignore.case = TRUE),]

trait_key <- setNames(DEGAS_seurat@meta.data$label_phe,
                      DEGAS_seurat@meta.data$label_phe_code)

degas_corr <- cor(Matrix::t(DEGAS_seurat@reductions$contributionGene@cell.embeddings))[,rownames(pd_traits)] %>%
  data.table::data.table(keep.rownames = "Trait2_code")
degas_corr[,Trait2:=trait_key[Trait2_code],] 
 
degas_corr <- melt(degas_corr, 
                   id.vars=c("Trait2_code","Trait2"), 
                   variable.name = "Trait1_code", 
                   value.name = "corr",
                   #### Important! otherwise will screw up dict #@##
                   variable.factor = FALSE)
degas_corr[,Trait1:=trait_key[Trait1_code],]


data.table::setorder(degas_corr, -corr)
data.table::setcolorder(degas_corr,
                        c("Trait1","Trait2",
                          "Trait1_code","Trait2_code","corr"))

# data.table::fwrite(degas_corr,
                # here::here("data/DEGAS/DEGAS_contributionGene_corr.csv"))

Download

googledrive::drive_download("https://docs.google.com/spreadsheets/d/19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0/edit?usp=sharing#gid=1366974120", path = here::here("data/metaanalysis/TableS2.xlsx"), overwrite = T)
## ! Using an auto-discovered, cached token.
##   To suppress this message, modify your code or options to clearly consent to
##   the use of a cached token.
##   See gargle's "Non-interactive auth" vignette for more details:
##   <https://gargle.r-lib.org/articles/non-interactive-auth.html>
## ℹ The googledrive package is using a cached token for
##   'brian_schilder@alumni.brown.edu'.
## File downloaded:
## • 'TableS2' <id: 19jz9l2P7W2f1PWT9t3x0L8h6VQL021RdRI7D4FAOjX0>
## Saved locally as:
## • '/Desktop/PD_omics_review/data/metaanalysis/TableS2.xlsx'
sheets <- readxl::excel_sheets(here::here("data/metaanalysis/TableS2.xlsx")) 

Merge datasets

corr_sheets <- sheets[endsWith(sheets,"_corr")]
 
corr_data <- lapply(corr_sheets, function(sheet){
  print(sheet)
  dat <- readxl::read_excel(here::here("data/metaanalysis/TableS2.xlsx"), sheet = sheet) 
  dat$Sheet <- gsub("_corr","",sheet) 
  cols <- c("Trait1","Trait2","corr",
            "Reference","Source","Dataset")  
  cols <- cols[cols %in% colnames(dat)]
  return(dplyr::relocate(dat, all_of(cols), .before = 1))
}) %>% `names<-`(corr_sheets) %>%
  data.table::rbindlist(fill = TRUE)
## [1] "Bryois2020_corr"
## [1] "Nalls2019_corr"
## [1] "Agarwal2020_corr"
## [1] "DeGAs_corr"
## [1] "dPRS_corr"

Harmonise names

Attempt to somewhat harmonise phenotype names across studies.

corr_data <- corr_data %>% 
  dplyr::mutate(Phenotype = stringr::str_trunc(stringr::str_wrap(tolower(Trait2), 50),50)
) 

Top traits

Get the top N traits per study that are most highly correlated with Parkinson’s Disease.

Gather

max_traits <- 10
  
top_corr <- corr_data %>% 
  subset(Trait1!=Trait2) %>%
  dplyr::group_by(Sheet, Trait1) %>%
  dplyr::mutate(Phenotype = paste0(Phenotype,"@",group_indices())) %>%
  dplyr::slice_max(order_by = corr, n = max_traits) %>%
  dplyr::select(Sheet, Trait1, Trait2, Phenotype, corr, Source) %>% 
  dplyr::ungroup() %>%
  dplyr::arrange(corr)
## Warning: `group_indices()` was deprecated in dplyr 1.0.0.
## Please use `cur_group_id()` instead.
top_corr$Phenotype <- factor(top_corr$Phenotype, 
                          unique(top_corr$Phenotype),
                          ordered = TRUE)
knitr::kable(top_corr)
Sheet Trait1 Trait2 Phenotype corr Source
Bryois2020 Parkinson’s disease Stroke 0.0197000 Supplementary Table 4
Bryois2020 Parkinson’s disease Bipolar 0.0244000 Supplementary Table 4
Bryois2020 Parkinson’s disease Waist to hip ratio adjusted for BMI waist to hip ratio adjusted for 0.0325000 Supplementary Table 4
Bryois2020 Parkinson’s disease Intelligence 0.0448000 Supplementary Table 4
Bryois2020 Parkinson’s disease Educational attainment educational 0.0460000 Supplementary Table 4
Bryois2020 Parkinson’s disease Hippocampal volume hippocampal 0.0573000 Supplementary Table 4
Bryois2020 Parkinson’s disease Alzheimer’s disease alzheimer’s 0.0776000 Supplementary Table 4
Bryois2020 Parkinson’s disease Anorexia 0.0900000 Supplementary Table 4
Bryois2020 Parkinson’s disease Amyotrophic lateral sclerosis amyotrophic lateral 0.1884000 Supplementary Table 4
Nalls2019 Parkinson’s Disease Mean Pallidum mean 0.2332000 Supplementary Table 10
Nalls2019 Parkinson’s Disease Number of older siblings number of older 0.2470000 Supplementary Table 10
Nalls2019 Parkinson’s Disease Mean Putamen mean 0.2481000 Supplementary Table 10
Nalls2019 Parkinson’s Disease Diagnoses - main ICD10: C61 Malignant neoplasm of prostate diagnoses - main icd10: c61 malignant neoplasm …@5 0.2512000 Supplementary Table 10
Nalls2019 Parkinson’s Disease GCSF.sumstats.gz 0.2636000 Supplementary Table 10
Nalls2019 Parkinson’s Disease Underlying (primary) cause of death: ICD10: J84.1 Other interstitial pulmonary diseases with fibrosis underlying (primary) cause of death: icd10:
j84…@5 0.2696000 Supplementary Table 10
Nalls2019 Parkinson’s Disease Mean Accumbens mean 0.2770000 Supplementary Table 10
Bryois2020 Parkinson’s disease Intracranial volume intracranial 0.2851000 Supplementary Table 4
Agarwal2020 Parkinson disease Coronary Artery Disease coronary artery 0.2908336 Computed by review authors
Nalls2019 Parkinson’s Disease Cancer code_ self-reported: small intestine/small bowel cancer cancer code_ self-reported: small intestine/sma…@5 0.2971000 Supplementary Table 10
Agarwal2020 Parkinson disease Ulcerative colitis ulcerative 0.3022025 Computed by review authors
Nalls2019 Parkinson’s Disease ICV 0.3511000 Supplementary Table 10
Agarwal2020 Parkinson disease Anxiety 0.3739806 Computed by review authors
Nalls2019 Parkinson’s Disease Diagnoses - main ICD10: B37 Candidiasis diagnoses - main icd10: b37 0.3822000 Supplementary Table 10
Agarwal2020 Parkinson disease ADHD 0.3851822 Computed by review authors
Agarwal2020 Parkinson disease Schizophrenia (2018) schizophrenia (2018)@1 0.4534542 Computed by review authors
Agarwal2020 Parkinson disease Intelligence 0.4798319 Computed by review authors
DeGAs Parkinson’s disease helicobacter pylori helicobacter 0.5041095 Computed by review authors
DeGAs Parkinson’s disease gestational hypertension/pre-eclampsia gestational 0.5083302 Computed by review authors
DeGAs Parkinson’s disease Age hayfever or allergic rhinitis diagnosed by doctor age hayfever or allergic rhinitis diagnosed by
@3 0.5105433 Computed by review authors
DeGAs Parkinson’s disease Number of children fathered number of children 0.5118207 Computed by review authors
DeGAs Parkinson’s disease chickenpox 0.5452407 Computed by review authors
DeGAs Parkinson’s disease metoclopramide 0.5535843 Computed by review authors
DeGAs Parkinson’s disease Mean ISOVF in fornix cres+stria terminalis on FA skeleton (left) mean isovf in fornix cres+stria terminalis on f…@3 0.5675078 Computed by review authors
DeGAs Parkinson’s disease Mean ISOVF in fornix cres+stria terminalis on FA skeleton (right) mean isovf in fornix cres+stria terminalis on f…@3 0.5870492 Computed by review authors
DeGAs Parkinson’s disease pulmonary fibrosis pulmonary 0.5958769 Computed by review authors
Agarwal2020 Parkinson disease BMI 0.6242659 Computed by review authors
Agarwal2020 Parkinson disease Tourette Syndrome tourette 0.6766420 Computed by review authors
DeGAs Parkinson’s disease anal fissure anal 0.6783448 Computed by review authors
Agarwal2020 Parkinson disease Amyotrophic Lateral Sclerosis amyotrophic lateral 0.7070386 Computed by review authors
Agarwal2020 Parkinson disease Epilepsy (General) epilepsy (general)@1 0.7243838 Computed by review authors
DeGAs parkinsons disease connective tissue disorder connective tissue 0.8339059 Computed by review authors
DeGAs parkinsons disease movicol oral powder movicol oral 0.8404283 Computed by review authors
DeGAs parkinsons disease fracture pelvis fracture 0.8412213 Computed by review authors
DeGAs parkinsons disease fracture finger fracture 0.8427052 Computed by review authors
DeGAs parkinsons disease finasteride 0.8441802 Computed by review authors
DeGAs parkinsons disease bile duct obstruction/ascending cholangitis bile duct obstruction/ascending 0.8465642 Computed by review authors
DeGAs parkinsons disease sclerosing cholangitis sclerosing 0.8489558 Computed by review authors
DeGAs parkinsons disease pneumothorax 0.8494077 Computed by review authors
DeGAs parkinsons disease alcoholic liver disease/alcoholic cirrhosis alcoholic liver disease/alcoholic 0.8557067 Computed by review authors
DeGAs parkinsons disease fracture patella/knee fracture 0.8606616 Computed by review authors

Plot

corr_plot <- ggplot(top_corr, 
                    aes(x=corr, y=Phenotype, fill=Sheet)) +
  facet_grid(facets = Sheet ~., 
             scales = "free_y") + 
  geom_bar(stat="identity", aes(alpha=corr), show.legend = F) +
  labs(title = "Genetic correlations with PD",
       x="Correlation", y="Phenotype") +
  scale_fill_manual(values=pals::gnuplot(dplyr::n_distinct(top_corr$Sheet)+1 ) ) +
  scale_x_continuous(limits = c(0,1)) +
  # Create a dictionary mapping the new labels
  scale_y_discrete(labels = setNames(gsub("@.*","",levels(top_corr$Phenotype)),
                                     levels(top_corr$Phenotype) ) ) + 
  theme_bw() +
  theme(strip.background = element_rect(fill = "black"), 
        strip.text = element_text(color = "white", angle = 0),
        strip.text.y = element_text(color = "white", angle = 0),
        panel.grid.minor.x = element_blank(),
        panel.grid.minor.y = element_blank())
print(corr_plot)
## Warning: Removed 1 rows containing missing values (geom_bar).

ggsave(here::here("plots/metaanalysis/correlation_metaanalysis.pdf"),corr_plot, dpi = 300, height = 15)
## Saving 7 x 15 in image
## Warning: Removed 1 rows containing missing values (geom_bar).

Bryois 2020

bryois <- readxl::read_excel(here::here("data/metaanalysis/TableS2.xlsx"), sheet="Bryois2020_corr")

mat_bryois <- as.matrix(bryois$rg) %>% `rownames<-`(bryois$Trait2) %>% `colnames<-`("rg")

heat <- ggplot(bryois, aes(y=Trait2, x=1, fill=rg)) +
  geom_tile() + 
  theme_bw()
plotly::ggplotly(heat)

LD Hub

I began to explore this dataset but then realized it does not contain any Parkinson’s Disease GWAS.

rg = xlsx::read.xlsx(here::here("data/LD_Hub/LD-Hub_genetic_correlation_221x221_no_ENIGMA.xlsx"), sheetName = "rG") %>% 
  tibble::column_to_rownames("NA.")
colnames(rg) <- gsub("[.]","-",colnames(rg))
row.names(rg) <- gsub("[.]","-",row.names(rg))
rg[rg=="/"] <- NA 
rg <- Matrix::as.matrix(rg, sparse = T)


rp = xlsx::read.xlsx(here::here("data/LD-Hub_genetic_correlation_221x221_no_ENIGMA.xlsx"), sheetName = "rP")%>%
    tibble::column_to_rownames("NA.")
colnames(rp) <- gsub("[.]","-",colnames(rp))
row.names(rp) <- gsub("[.]","-",row.names(rp))
rp[rp=="/"] <- NA 
rp <- Matrix::as.matrix(rp, sparse = T)

Check for Parkinson’s

grep("Parkinson",colnames(rp), ignore.case = T, value = T)

Session Info

utils::sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.8.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] phenomix_0.2.0    data.table_1.14.0 googledrive_2.0.0 ggplot2_3.3.5    
## [5] dplyr_1.0.7      
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2                  reticulate_1.20            
##   [3] R.utils_2.10.1              lme4_1.1-27.1              
##   [5] tidyselect_1.1.1            RSQLite_2.2.7              
##   [7] AnnotationDbi_1.55.1        htmlwidgets_1.5.3          
##   [9] grid_4.1.0                  BiocParallel_1.27.3        
##  [11] Rtsne_0.15                  munsell_0.5.0              
##  [13] ragg_1.1.3                  codetools_0.2-18           
##  [15] ica_1.0-2                   future_1.21.0              
##  [17] miniUI_0.1.1.1              withr_2.4.2                
##  [19] colorspace_2.0-2            Biobase_2.53.0             
##  [21] filelock_1.0.2              highr_0.9                  
##  [23] knitr_1.33                  rstudioapi_0.13            
##  [25] Seurat_4.0.3                stats4_4.1.0               
##  [27] ROCR_1.0-11                 tensor_1.5                 
##  [29] listenv_0.8.0               labeling_0.4.2             
##  [31] MatrixGenerics_1.5.3        GenomeInfoDbData_1.2.6     
##  [33] polyclip_1.10-0             farver_2.1.0               
##  [35] bit64_4.0.5                 rprojroot_2.0.2            
##  [37] parallelly_1.27.0           vctrs_0.3.8                
##  [39] generics_0.1.0              xfun_0.25                  
##  [41] BiocFileCache_2.1.1         doParallel_1.0.16          
##  [43] R6_2.5.0                    GenomeInfoDb_1.29.3        
##  [45] pals_1.7                    bitops_1.0-7               
##  [47] spatstat.utils_2.2-0        cachem_1.0.5               
##  [49] DelayedArray_0.19.1         assertthat_0.2.1           
##  [51] promises_1.2.0.1            BiocIO_1.3.0               
##  [53] scales_1.1.1                gtable_0.3.0               
##  [55] globals_0.14.0              goftest_1.2-2              
##  [57] rlang_0.4.11                systemfonts_1.0.2          
##  [59] MungeSumstats_1.1.14        splines_4.1.0              
##  [61] rtracklayer_1.53.1          lazyeval_0.2.2             
##  [63] gargle_1.2.0                dichromat_2.0-0            
##  [65] broom_0.7.9                 spatstat.geom_2.2-2        
##  [67] abind_1.4-5                 yaml_2.2.1                 
##  [69] reshape2_1.4.4              crosstalk_1.1.1            
##  [71] backports_1.2.1             GenomicFeatures_1.45.1     
##  [73] httpuv_1.6.1                tools_4.1.0                
##  [75] ellipsis_0.3.2              gplots_3.1.1               
##  [77] spatstat.core_2.3-0         jquerylib_0.1.4            
##  [79] RColorBrewer_1.1-2          BiocGenerics_0.39.1        
##  [81] ggridges_0.5.3              Rcpp_1.0.7                 
##  [83] plyr_1.8.6                  progress_1.2.2             
##  [85] zlibbioc_1.39.0             purrr_0.3.4                
##  [87] RCurl_1.98-1.4              prettyunits_1.1.1          
##  [89] openssl_1.4.4               rpart_4.1-15               
##  [91] deldir_0.2-10               pbapply_1.4-3              
##  [93] cowplot_1.1.1               S4Vectors_0.31.0           
##  [95] zoo_1.8-9                   SeuratObject_4.0.2         
##  [97] SummarizedExperiment_1.23.1 ggrepel_0.9.1              
##  [99] cluster_2.1.2               fs_1.5.0                   
## [101] here_1.0.1                  variancePartition_1.23.1   
## [103] magrittr_2.0.1              scattermore_0.7            
## [105] lmtest_0.9-38               RANN_2.6.1                 
## [107] fitdistrplus_1.1-5          matrixStats_0.60.0         
## [109] hms_1.1.0                   patchwork_1.1.1            
## [111] mime_0.11                   evaluate_0.14              
## [113] xtable_1.8-4                pbkrtest_0.5.1             
## [115] XML_3.99-0.7                readxl_1.3.1               
## [117] sparsesvd_0.2               IRanges_2.27.0             
## [119] gridExtra_2.3               compiler_4.1.0             
## [121] biomaRt_2.49.4              maps_3.3.0                 
## [123] tibble_3.1.3                KernSmooth_2.23-20         
## [125] crayon_1.4.1                minqa_1.2.4                
## [127] R.oo_1.24.0                 htmltools_0.5.1.1          
## [129] mgcv_1.8-36                 later_1.2.0                
## [131] tidyr_1.1.3                 DBI_1.1.1                  
## [133] gprofiler2_0.2.0            dbplyr_2.1.1               
## [135] MASS_7.3-54                 rappdirs_0.3.3             
## [137] boot_1.3-28                 Matrix_1.3-4               
## [139] cli_3.0.1                   R.methodsS3_1.8.1          
## [141] parallel_4.1.0              igraph_1.2.6               
## [143] GenomicRanges_1.45.0        pkgconfig_2.0.3            
## [145] GenomicAlignments_1.29.0    plotly_4.9.4.9000          
## [147] spatstat.sparse_2.0-0       foreach_1.5.1              
## [149] xml2_1.3.2                  bslib_0.2.5.1              
## [151] XVector_0.33.0              GeneOverlap_1.29.0         
## [153] stringr_1.4.0               VariantAnnotation_1.39.0   
## [155] digest_0.6.27               sctransform_0.3.2          
## [157] RcppAnnoy_0.0.19            spatstat.data_2.1-0        
## [159] Biostrings_2.61.2           cellranger_1.1.0           
## [161] rmarkdown_2.10              leiden_0.3.9               
## [163] uwot_0.1.10.9000            restfulr_0.0.13            
## [165] curl_4.3.2                  shiny_1.6.0                
## [167] Rsamtools_2.9.1             gtools_3.9.2               
## [169] nloptr_1.2.2.2              rjson_0.2.20               
## [171] nlme_3.1-152                lifecycle_1.0.0            
## [173] jsonlite_1.7.2              mapproj_1.2.7              
## [175] askpass_1.1                 limma_3.49.4               
## [177] viridisLite_0.4.0           BSgenome_1.61.0            
## [179] fansi_0.5.0                 pillar_1.6.2               
## [181] lattice_0.20-44             KEGGREST_1.33.0            
## [183] fastmap_1.1.0               httr_1.4.2                 
## [185] survival_3.2-12             googleAuthR_1.4.0          
## [187] glue_1.4.2                  iterators_1.0.13           
## [189] png_0.1-7                   bit_4.0.4                  
## [191] stringi_1.7.3               sass_0.4.0                 
## [193] blob_1.2.2                  textshaping_0.3.5          
## [195] caTools_1.18.2              memoise_2.0.0              
## [197] irlba_2.3.3                 future.apply_1.8.1